use "../processed/GHS_aggr.dta", clear

local mined = 6 // min year + 1
local maxed = 20
gen yschl = min(agelfted-5,`maxed')
tab yschl [fw=nobs], m
egen cy = group(datyear yearat14) 
// cohort-year group for clustering 
// (using cohort only would be impractical since # of clusters would be too small)
sum cy, d

local ctrl
forvalues j=1/4{
	local ctrl `ctrl' age_`j' yearat14_`j'
}
// First Stage
eststo: reg yschl drop15 `ctrl' [fw=nobs], vce(cluster cy)
qui foreach v in yschl drop15{
	reg `v' `ctrl' [fw=nobs]
	predict `v'_res, resid
}

// Decompose IV-OLS gap
gen xc = max(yschl-11,0) // 12th- year effect
local ctrlx
foreach w in `ctrl'{
	gen `w'x = `w'*yschl
	local ctrlx `ctrlx' `w'x
}
local ctrlxc
foreach w in `ctrl'{
	gen `w'xc = `w'*xc
	local ctrlxc `ctrlxc' `w'xc
}	
local xlist
forvalues j=`mined'/`maxed'{
	gen x`j' = (yschl>=`j')
	local xlist `xlist' x`j'
}
local op `"[fw=nobs], vce(cluster cy)"'

corr yschl_res drop15_res [fw=nobs], covar
scalar m_xx = r(Var_1)
scalar m_xz = r(cov_12)

matrix A = J(6,2,.)
matrix colnames A = "Coefficient" "StdError"

// OLS
reg learn yschl `ctrl' `op'
scalar b_ols = _b[yschl]
matrix A[1,1] = _b[yschl]
matrix A[1,2] = _se[yschl]
predict e_ols, resid
// IV
ivregress 2sls learn (yschl=drop15) `ctrl' `op'
scalar b_iv = _b[yschl]
matrix A[2,1] = _b[yschl]
matrix A[2,2] = _se[yschl]
predict e_iv, resid
// IV-OLS Gap
gen temp = e_iv*drop15_res/m_xz - e_ols*yschl_res/m_xx
reg temp `op'
drop temp
matrix A[3,1] = b_iv-b_ols
matrix A[3,2] = _se[_cons] 

// predict E[Y|X]
reg learn yschl `ctrlx' `ctrl' [fw=nobs]
predict mhat1, xb
predict vhat1, resid
reg drop15_res yschl `ctrlx' `ctrl' [fw=nobs]
predict zhat1, xb

reg learn `xlist' `ctrlxc' `ctrlx' `ctrl' [fw=nobs]
predict mhat2, xb
predict vhat2, resid
reg drop15_res `xlist' `ctrlxc' `ctrlx' `ctrl' [fw=nobs]
predict zhat2, xb

// covariate weight difference
ivregress 2sls mhat1 (yschl=drop15) `ctrl' `op'
scalar b_c = _b[yschl]
predict e_c, resid
gen temp = ( e_c*drop15_res + vhat1*zhat1 )/m_xz - e_ols*yschl_res/m_xx
reg temp `op'
drop temp
matrix A[4,1] = b_c - b_ols
matrix A[4,2] = _se[_cons]  

// treatment-level weight difference
ivregress 2sls mhat2 (yschl=drop15) `ctrl' [fw=nobs]
scalar b_ct = _b[yschl]
predict e_ct, resid
gen temp = ( (e_ct*drop15_res + vhat2*zhat2) - (e_c*drop15_res + vhat1*zhat1) )/m_xz
reg temp `op'
drop temp
matrix A[5,1] = b_ct - b_c
matrix A[5,2] = _se[_cons]  

// marginal effect difference
gen temp = ( e_iv*drop15_res - (e_ct*drop15_res + vhat2*zhat2) )/m_xz
reg temp `op'
drop temp
matrix A[6,1] = b_iv - b_ct
matrix A[6,2] = _se[_cons]  

clear
svmat A, names(col)
gen stat = ""
replace stat = "OLS" in 1
replace stat = "IV" in 2
replace stat = "IV-OLS" in 3
replace stat = "D_CW" in 4
replace stat = "D_TW" in 5
replace stat = "D_ME" in 6
order stat, first
export delimited using ../result/decom_base.csv, replace
